• DOMAIN: Entertainment
• CONTEXT: Company X owns a movie application and repository which caters movie streaming to millions of users who on subscription basis.
Company wants to automate the process of cast and crew information in each scene from a movie such that when a user pauses on the
movie and clicks on cast information button, the app will show details of the actor in the scene. Company has an in-house computer vision
and multimedia experts who need to detect faces from screen shots from the movie scene.
• DATA DESCRIPTION: The dataset comprises of images and its mask where there is a human face.
• PROJECT OBJECTIVE: Face detection from training images.
Steps and tasks:
%tensorflow_version 2.x
import tensorflow
tensorflow.__version__
'2.8.0'
import random
random.seed(0)
# Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
#Importing the Libraries
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, Activation, BatchNormalization
from tensorflow.keras.layers import UpSampling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras import backend as K
from PIL import Image
from numpy import asarray
cd "/content/drive/MyDrive/CV2 Project"
/content/drive/MyDrive/CV2 Project
data = np.load('images.npy', allow_pickle=True)
data.shape
(409, 2)
The file contains 409 images and labels. Let's view few images and their labels.
data[0][0]
array([[[42, 37, 34],
[56, 51, 48],
[71, 66, 63],
...,
[23, 33, 34],
[26, 36, 37],
[28, 38, 39]],
[[40, 35, 32],
[51, 46, 43],
[64, 59, 56],
...,
[27, 36, 35],
[24, 33, 32],
[26, 35, 34]],
[[43, 38, 35],
[51, 46, 43],
[61, 56, 53],
...,
[28, 30, 27],
[33, 35, 32],
[35, 37, 34]],
...,
[[56, 47, 40],
[57, 48, 41],
[61, 52, 45],
...,
[67, 48, 42],
[55, 35, 28],
[60, 40, 33]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[60, 40, 33],
[54, 34, 27]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[70, 50, 43],
[64, 44, 37]]], dtype=uint8)
data[408][1]
[{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.3201219512195122, 'y': 0.2839756592292089},
{'x': 0.4009146341463415, 'y': 0.4198782961460446}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.4557926829268293, 'y': 0.38742393509127787},
{'x': 0.5442073170731707, 'y': 0.5618661257606491}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.6707317073170732, 'y': 0.38336713995943206},
{'x': 0.7134146341463414, 'y': 0.4746450304259635}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.16615853658536586, 'y': 0.4665314401622718},
{'x': 0.22560975609756098, 'y': 0.563894523326572}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.07164634146341463, 'y': 0.539553752535497},
{'x': 0.11280487804878049, 'y': 0.5862068965517241}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.25, 'y': 0.24746450304259635},
{'x': 0.2850609756097561, 'y': 0.30425963488843816}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.4847560975609756, 'y': 0.2332657200811359},
{'x': 0.5274390243902439, 'y': 0.3225152129817444}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.5929878048780488, 'y': 0.31643002028397565},
{'x': 0.6204268292682927, 'y': 0.359026369168357}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.7728658536585366, 'y': 0.41379310344827586},
{'x': 0.801829268292683, 'y': 0.45841784989858014}]},
{'imageHeight': 493,
'imageWidth': 656,
'label': ['Face'],
'notes': '',
'points': [{'x': 0.8033536585365854, 'y': 0.5152129817444219},
{'x': 0.850609756097561, 'y': 0.6267748478701826}]}]
Viewing few random images and labels in the dataset
from google.colab.patches import cv2_imshow
cv2_imshow(data[24][0])
fi,ax = plt.subplots(10,3,figsize=(20,30))
row = 0
col = 0
index = 0
for i in range(30):
ax[row][col].imshow(data[index][0], interpolation='nearest')
index = index + 12
col = col + 1
if col > 2:
row = row + 1
col = 0
plt.show()
Creating features (images) and labels (mask)
from tensorflow.keras.applications.mobilenet import preprocess_input
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
HEIGHT_CELLS = 28
WIDTH_CELLS = 28
IMAGE_SIZE = 224
masks = np.zeros((int(data.shape[0]), IMAGE_HEIGHT, IMAGE_WIDTH))
X = np.zeros((int(data.shape[0]),IMAGE_HEIGHT, IMAGE_WIDTH, 3))
for index in range(data.shape[0]):
img = data[index][0]
img = cv2.resize(img, dsize=(IMAGE_HEIGHT, IMAGE_WIDTH), interpolation=cv2.INTER_CUBIC)
# assign all pixels in the first 3 channels only to the image, i.e., discard the alpha channel
try:
img = img[:,:,:3]
except:
print(f"Exception {index} Grayscale image with shape {img.shape}")
# convert the grayscale image to color so that the number of channels are standardized to 3
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
continue
X[index] = preprocess_input(np.array(img, dtype=np.float32))
# Loop through the face co-ordinates and create mask out of it.
for i in data[index][1]:
x1 = int(i['points'][0]['x'] * IMAGE_WIDTH)
x2 = int(i['points'][1]['x'] * IMAGE_WIDTH)
y1 = int(i['points'][0]['y'] * IMAGE_HEIGHT)
y2 = int(i['points'][1]['y'] * IMAGE_HEIGHT)
# set all pixels within the mask co-ordinates to 1.
masks[index][y1:y2, x1:x2] = 1
print(f"### Shape of X is '{X.shape}' and the shape of mask is '{masks.shape}' ")
Exception 272 Grayscale image with shape (224, 224) ### Shape of X is '(409, 224, 224, 3)' and the shape of mask is '(409, 224, 224)'
Splitting the data into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, masks, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.2)
print(f"Shape of X_train is '{X_train.shape}' and the shape of y_train is '{y_train.shape}'")
print(f"Shape of X_val is '{X_val.shape}' and the shape of y_val is '{y_val.shape}'")
print(f"Shape of X_test is '{X_test.shape}' and the shape of y_test is '{y_test.shape}'")
Shape of X_train is '(327, 224, 224, 3)' and the shape of y_train is '(327, 224, 224)' Shape of X_val is '(65, 224, 224, 3)' and the shape of y_val is '(65, 224, 224)' Shape of X_test is '(17, 224, 224, 3)' and the shape of y_test is '(17, 224, 224)'
Visualizing X_train and y_train images
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(X_train[0])
a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(X_train[10])
imgplot.set_clim(0.0, 0.7)
a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(X_train[20])
imgplot.set_clim(0.0, 1.4)
a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(X_train[30])
imgplot.set_clim(0.0, 2.1)
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(y_train[0])
a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(y_train[10])
imgplot.set_clim(0.0, 0.7)
a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(y_train[20])
imgplot.set_clim(0.0, 1.4)
a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(y_train[30])
imgplot.set_clim(0.0, 1.4)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Creating a Mask Detection Model using U-net with MobileNet Transfer Learning Model
IMAGE_SIZE = 224
EPOCHS = 30
BATCH = 8
LR = 1e-4
def model():
inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")
encoder = MobileNetV2(input_tensor=inputs, weights="imagenet", include_top=False, alpha=0.35)
skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
encoder_output = encoder.get_layer("block_13_expand_relu").output
f = [16, 32, 48, 64]
x = encoder_output
for i in range(1, len(skip_connection_names)+1, 1):
x_skip = encoder.get_layer(skip_connection_names[-i]).output
x = UpSampling2D((2, 2))(x)
x = Concatenate()([x, x_skip])
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(1, (1, 1), padding="same")(x)
x = Activation("sigmoid")(x)
model = Model(inputs, x)
return model
model = model()
model.summary()
WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not in [96, 128, 160, 192, 224]. Weights for input shape (224, 224) will be loaded as the default.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_0.35_224_no_top.h5
2023424/2019640 [==============================] - 0s 0us/step
2031616/2019640 [==============================] - 0s 0us/step
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_image (InputLayer) [(None, 224, 224, 3 0 []
)]
Conv1 (Conv2D) (None, 112, 112, 16 432 ['input_image[0][0]']
)
bn_Conv1 (BatchNormalization) (None, 112, 112, 16 64 ['Conv1[0][0]']
)
Conv1_relu (ReLU) (None, 112, 112, 16 0 ['bn_Conv1[0][0]']
)
expanded_conv_depthwise (Depth (None, 112, 112, 16 144 ['Conv1_relu[0][0]']
wiseConv2D) )
expanded_conv_depthwise_BN (Ba (None, 112, 112, 16 64 ['expanded_conv_depthwise[0][0]']
tchNormalization) )
expanded_conv_depthwise_relu ( (None, 112, 112, 16 0 ['expanded_conv_depthwise_BN[0][0
ReLU) ) ]']
expanded_conv_project (Conv2D) (None, 112, 112, 8) 128 ['expanded_conv_depthwise_relu[0]
[0]']
expanded_conv_project_BN (Batc (None, 112, 112, 8) 32 ['expanded_conv_project[0][0]']
hNormalization)
block_1_expand (Conv2D) (None, 112, 112, 48 384 ['expanded_conv_project_BN[0][0]'
) ]
block_1_expand_BN (BatchNormal (None, 112, 112, 48 192 ['block_1_expand[0][0]']
ization) )
block_1_expand_relu (ReLU) (None, 112, 112, 48 0 ['block_1_expand_BN[0][0]']
)
block_1_pad (ZeroPadding2D) (None, 113, 113, 48 0 ['block_1_expand_relu[0][0]']
)
block_1_depthwise (DepthwiseCo (None, 56, 56, 48) 432 ['block_1_pad[0][0]']
nv2D)
block_1_depthwise_BN (BatchNor (None, 56, 56, 48) 192 ['block_1_depthwise[0][0]']
malization)
block_1_depthwise_relu (ReLU) (None, 56, 56, 48) 0 ['block_1_depthwise_BN[0][0]']
block_1_project (Conv2D) (None, 56, 56, 8) 384 ['block_1_depthwise_relu[0][0]']
block_1_project_BN (BatchNorma (None, 56, 56, 8) 32 ['block_1_project[0][0]']
lization)
block_2_expand (Conv2D) (None, 56, 56, 48) 384 ['block_1_project_BN[0][0]']
block_2_expand_BN (BatchNormal (None, 56, 56, 48) 192 ['block_2_expand[0][0]']
ization)
block_2_expand_relu (ReLU) (None, 56, 56, 48) 0 ['block_2_expand_BN[0][0]']
block_2_depthwise (DepthwiseCo (None, 56, 56, 48) 432 ['block_2_expand_relu[0][0]']
nv2D)
block_2_depthwise_BN (BatchNor (None, 56, 56, 48) 192 ['block_2_depthwise[0][0]']
malization)
block_2_depthwise_relu (ReLU) (None, 56, 56, 48) 0 ['block_2_depthwise_BN[0][0]']
block_2_project (Conv2D) (None, 56, 56, 8) 384 ['block_2_depthwise_relu[0][0]']
block_2_project_BN (BatchNorma (None, 56, 56, 8) 32 ['block_2_project[0][0]']
lization)
block_2_add (Add) (None, 56, 56, 8) 0 ['block_1_project_BN[0][0]',
'block_2_project_BN[0][0]']
block_3_expand (Conv2D) (None, 56, 56, 48) 384 ['block_2_add[0][0]']
block_3_expand_BN (BatchNormal (None, 56, 56, 48) 192 ['block_3_expand[0][0]']
ization)
block_3_expand_relu (ReLU) (None, 56, 56, 48) 0 ['block_3_expand_BN[0][0]']
block_3_pad (ZeroPadding2D) (None, 57, 57, 48) 0 ['block_3_expand_relu[0][0]']
block_3_depthwise (DepthwiseCo (None, 28, 28, 48) 432 ['block_3_pad[0][0]']
nv2D)
block_3_depthwise_BN (BatchNor (None, 28, 28, 48) 192 ['block_3_depthwise[0][0]']
malization)
block_3_depthwise_relu (ReLU) (None, 28, 28, 48) 0 ['block_3_depthwise_BN[0][0]']
block_3_project (Conv2D) (None, 28, 28, 16) 768 ['block_3_depthwise_relu[0][0]']
block_3_project_BN (BatchNorma (None, 28, 28, 16) 64 ['block_3_project[0][0]']
lization)
block_4_expand (Conv2D) (None, 28, 28, 96) 1536 ['block_3_project_BN[0][0]']
block_4_expand_BN (BatchNormal (None, 28, 28, 96) 384 ['block_4_expand[0][0]']
ization)
block_4_expand_relu (ReLU) (None, 28, 28, 96) 0 ['block_4_expand_BN[0][0]']
block_4_depthwise (DepthwiseCo (None, 28, 28, 96) 864 ['block_4_expand_relu[0][0]']
nv2D)
block_4_depthwise_BN (BatchNor (None, 28, 28, 96) 384 ['block_4_depthwise[0][0]']
malization)
block_4_depthwise_relu (ReLU) (None, 28, 28, 96) 0 ['block_4_depthwise_BN[0][0]']
block_4_project (Conv2D) (None, 28, 28, 16) 1536 ['block_4_depthwise_relu[0][0]']
block_4_project_BN (BatchNorma (None, 28, 28, 16) 64 ['block_4_project[0][0]']
lization)
block_4_add (Add) (None, 28, 28, 16) 0 ['block_3_project_BN[0][0]',
'block_4_project_BN[0][0]']
block_5_expand (Conv2D) (None, 28, 28, 96) 1536 ['block_4_add[0][0]']
block_5_expand_BN (BatchNormal (None, 28, 28, 96) 384 ['block_5_expand[0][0]']
ization)
block_5_expand_relu (ReLU) (None, 28, 28, 96) 0 ['block_5_expand_BN[0][0]']
block_5_depthwise (DepthwiseCo (None, 28, 28, 96) 864 ['block_5_expand_relu[0][0]']
nv2D)
block_5_depthwise_BN (BatchNor (None, 28, 28, 96) 384 ['block_5_depthwise[0][0]']
malization)
block_5_depthwise_relu (ReLU) (None, 28, 28, 96) 0 ['block_5_depthwise_BN[0][0]']
block_5_project (Conv2D) (None, 28, 28, 16) 1536 ['block_5_depthwise_relu[0][0]']
block_5_project_BN (BatchNorma (None, 28, 28, 16) 64 ['block_5_project[0][0]']
lization)
block_5_add (Add) (None, 28, 28, 16) 0 ['block_4_add[0][0]',
'block_5_project_BN[0][0]']
block_6_expand (Conv2D) (None, 28, 28, 96) 1536 ['block_5_add[0][0]']
block_6_expand_BN (BatchNormal (None, 28, 28, 96) 384 ['block_6_expand[0][0]']
ization)
block_6_expand_relu (ReLU) (None, 28, 28, 96) 0 ['block_6_expand_BN[0][0]']
block_6_pad (ZeroPadding2D) (None, 29, 29, 96) 0 ['block_6_expand_relu[0][0]']
block_6_depthwise (DepthwiseCo (None, 14, 14, 96) 864 ['block_6_pad[0][0]']
nv2D)
block_6_depthwise_BN (BatchNor (None, 14, 14, 96) 384 ['block_6_depthwise[0][0]']
malization)
block_6_depthwise_relu (ReLU) (None, 14, 14, 96) 0 ['block_6_depthwise_BN[0][0]']
block_6_project (Conv2D) (None, 14, 14, 24) 2304 ['block_6_depthwise_relu[0][0]']
block_6_project_BN (BatchNorma (None, 14, 14, 24) 96 ['block_6_project[0][0]']
lization)
block_7_expand (Conv2D) (None, 14, 14, 144) 3456 ['block_6_project_BN[0][0]']
block_7_expand_BN (BatchNormal (None, 14, 14, 144) 576 ['block_7_expand[0][0]']
ization)
block_7_expand_relu (ReLU) (None, 14, 14, 144) 0 ['block_7_expand_BN[0][0]']
block_7_depthwise (DepthwiseCo (None, 14, 14, 144) 1296 ['block_7_expand_relu[0][0]']
nv2D)
block_7_depthwise_BN (BatchNor (None, 14, 14, 144) 576 ['block_7_depthwise[0][0]']
malization)
block_7_depthwise_relu (ReLU) (None, 14, 14, 144) 0 ['block_7_depthwise_BN[0][0]']
block_7_project (Conv2D) (None, 14, 14, 24) 3456 ['block_7_depthwise_relu[0][0]']
block_7_project_BN (BatchNorma (None, 14, 14, 24) 96 ['block_7_project[0][0]']
lization)
block_7_add (Add) (None, 14, 14, 24) 0 ['block_6_project_BN[0][0]',
'block_7_project_BN[0][0]']
block_8_expand (Conv2D) (None, 14, 14, 144) 3456 ['block_7_add[0][0]']
block_8_expand_BN (BatchNormal (None, 14, 14, 144) 576 ['block_8_expand[0][0]']
ization)
block_8_expand_relu (ReLU) (None, 14, 14, 144) 0 ['block_8_expand_BN[0][0]']
block_8_depthwise (DepthwiseCo (None, 14, 14, 144) 1296 ['block_8_expand_relu[0][0]']
nv2D)
block_8_depthwise_BN (BatchNor (None, 14, 14, 144) 576 ['block_8_depthwise[0][0]']
malization)
block_8_depthwise_relu (ReLU) (None, 14, 14, 144) 0 ['block_8_depthwise_BN[0][0]']
block_8_project (Conv2D) (None, 14, 14, 24) 3456 ['block_8_depthwise_relu[0][0]']
block_8_project_BN (BatchNorma (None, 14, 14, 24) 96 ['block_8_project[0][0]']
lization)
block_8_add (Add) (None, 14, 14, 24) 0 ['block_7_add[0][0]',
'block_8_project_BN[0][0]']
block_9_expand (Conv2D) (None, 14, 14, 144) 3456 ['block_8_add[0][0]']
block_9_expand_BN (BatchNormal (None, 14, 14, 144) 576 ['block_9_expand[0][0]']
ization)
block_9_expand_relu (ReLU) (None, 14, 14, 144) 0 ['block_9_expand_BN[0][0]']
block_9_depthwise (DepthwiseCo (None, 14, 14, 144) 1296 ['block_9_expand_relu[0][0]']
nv2D)
block_9_depthwise_BN (BatchNor (None, 14, 14, 144) 576 ['block_9_depthwise[0][0]']
malization)
block_9_depthwise_relu (ReLU) (None, 14, 14, 144) 0 ['block_9_depthwise_BN[0][0]']
block_9_project (Conv2D) (None, 14, 14, 24) 3456 ['block_9_depthwise_relu[0][0]']
block_9_project_BN (BatchNorma (None, 14, 14, 24) 96 ['block_9_project[0][0]']
lization)
block_9_add (Add) (None, 14, 14, 24) 0 ['block_8_add[0][0]',
'block_9_project_BN[0][0]']
block_10_expand (Conv2D) (None, 14, 14, 144) 3456 ['block_9_add[0][0]']
block_10_expand_BN (BatchNorma (None, 14, 14, 144) 576 ['block_10_expand[0][0]']
lization)
block_10_expand_relu (ReLU) (None, 14, 14, 144) 0 ['block_10_expand_BN[0][0]']
block_10_depthwise (DepthwiseC (None, 14, 14, 144) 1296 ['block_10_expand_relu[0][0]']
onv2D)
block_10_depthwise_BN (BatchNo (None, 14, 14, 144) 576 ['block_10_depthwise[0][0]']
rmalization)
block_10_depthwise_relu (ReLU) (None, 14, 14, 144) 0 ['block_10_depthwise_BN[0][0]']
block_10_project (Conv2D) (None, 14, 14, 32) 4608 ['block_10_depthwise_relu[0][0]']
block_10_project_BN (BatchNorm (None, 14, 14, 32) 128 ['block_10_project[0][0]']
alization)
block_11_expand (Conv2D) (None, 14, 14, 192) 6144 ['block_10_project_BN[0][0]']
block_11_expand_BN (BatchNorma (None, 14, 14, 192) 768 ['block_11_expand[0][0]']
lization)
block_11_expand_relu (ReLU) (None, 14, 14, 192) 0 ['block_11_expand_BN[0][0]']
block_11_depthwise (DepthwiseC (None, 14, 14, 192) 1728 ['block_11_expand_relu[0][0]']
onv2D)
block_11_depthwise_BN (BatchNo (None, 14, 14, 192) 768 ['block_11_depthwise[0][0]']
rmalization)
block_11_depthwise_relu (ReLU) (None, 14, 14, 192) 0 ['block_11_depthwise_BN[0][0]']
block_11_project (Conv2D) (None, 14, 14, 32) 6144 ['block_11_depthwise_relu[0][0]']
block_11_project_BN (BatchNorm (None, 14, 14, 32) 128 ['block_11_project[0][0]']
alization)
block_11_add (Add) (None, 14, 14, 32) 0 ['block_10_project_BN[0][0]',
'block_11_project_BN[0][0]']
block_12_expand (Conv2D) (None, 14, 14, 192) 6144 ['block_11_add[0][0]']
block_12_expand_BN (BatchNorma (None, 14, 14, 192) 768 ['block_12_expand[0][0]']
lization)
block_12_expand_relu (ReLU) (None, 14, 14, 192) 0 ['block_12_expand_BN[0][0]']
block_12_depthwise (DepthwiseC (None, 14, 14, 192) 1728 ['block_12_expand_relu[0][0]']
onv2D)
block_12_depthwise_BN (BatchNo (None, 14, 14, 192) 768 ['block_12_depthwise[0][0]']
rmalization)
block_12_depthwise_relu (ReLU) (None, 14, 14, 192) 0 ['block_12_depthwise_BN[0][0]']
block_12_project (Conv2D) (None, 14, 14, 32) 6144 ['block_12_depthwise_relu[0][0]']
block_12_project_BN (BatchNorm (None, 14, 14, 32) 128 ['block_12_project[0][0]']
alization)
block_12_add (Add) (None, 14, 14, 32) 0 ['block_11_add[0][0]',
'block_12_project_BN[0][0]']
block_13_expand (Conv2D) (None, 14, 14, 192) 6144 ['block_12_add[0][0]']
block_13_expand_BN (BatchNorma (None, 14, 14, 192) 768 ['block_13_expand[0][0]']
lization)
block_13_expand_relu (ReLU) (None, 14, 14, 192) 0 ['block_13_expand_BN[0][0]']
up_sampling2d (UpSampling2D) (None, 28, 28, 192) 0 ['block_13_expand_relu[0][0]']
concatenate (Concatenate) (None, 28, 28, 288) 0 ['up_sampling2d[0][0]',
'block_6_expand_relu[0][0]']
conv2d (Conv2D) (None, 28, 28, 64) 165952 ['concatenate[0][0]']
batch_normalization (BatchNorm (None, 28, 28, 64) 256 ['conv2d[0][0]']
alization)
activation (Activation) (None, 28, 28, 64) 0 ['batch_normalization[0][0]']
conv2d_1 (Conv2D) (None, 28, 28, 64) 36928 ['activation[0][0]']
batch_normalization_1 (BatchNo (None, 28, 28, 64) 256 ['conv2d_1[0][0]']
rmalization)
activation_1 (Activation) (None, 28, 28, 64) 0 ['batch_normalization_1[0][0]']
up_sampling2d_1 (UpSampling2D) (None, 56, 56, 64) 0 ['activation_1[0][0]']
concatenate_1 (Concatenate) (None, 56, 56, 112) 0 ['up_sampling2d_1[0][0]',
'block_3_expand_relu[0][0]']
conv2d_2 (Conv2D) (None, 56, 56, 48) 48432 ['concatenate_1[0][0]']
batch_normalization_2 (BatchNo (None, 56, 56, 48) 192 ['conv2d_2[0][0]']
rmalization)
activation_2 (Activation) (None, 56, 56, 48) 0 ['batch_normalization_2[0][0]']
conv2d_3 (Conv2D) (None, 56, 56, 48) 20784 ['activation_2[0][0]']
batch_normalization_3 (BatchNo (None, 56, 56, 48) 192 ['conv2d_3[0][0]']
rmalization)
activation_3 (Activation) (None, 56, 56, 48) 0 ['batch_normalization_3[0][0]']
up_sampling2d_2 (UpSampling2D) (None, 112, 112, 48 0 ['activation_3[0][0]']
)
concatenate_2 (Concatenate) (None, 112, 112, 96 0 ['up_sampling2d_2[0][0]',
) 'block_1_expand_relu[0][0]']
conv2d_4 (Conv2D) (None, 112, 112, 32 27680 ['concatenate_2[0][0]']
)
batch_normalization_4 (BatchNo (None, 112, 112, 32 128 ['conv2d_4[0][0]']
rmalization) )
activation_4 (Activation) (None, 112, 112, 32 0 ['batch_normalization_4[0][0]']
)
conv2d_5 (Conv2D) (None, 112, 112, 32 9248 ['activation_4[0][0]']
)
batch_normalization_5 (BatchNo (None, 112, 112, 32 128 ['conv2d_5[0][0]']
rmalization) )
activation_5 (Activation) (None, 112, 112, 32 0 ['batch_normalization_5[0][0]']
)
up_sampling2d_3 (UpSampling2D) (None, 224, 224, 32 0 ['activation_5[0][0]']
)
concatenate_3 (Concatenate) (None, 224, 224, 35 0 ['up_sampling2d_3[0][0]',
) 'input_image[0][0]']
conv2d_6 (Conv2D) (None, 224, 224, 16 5056 ['concatenate_3[0][0]']
)
batch_normalization_6 (BatchNo (None, 224, 224, 16 64 ['conv2d_6[0][0]']
rmalization) )
activation_6 (Activation) (None, 224, 224, 16 0 ['batch_normalization_6[0][0]']
)
conv2d_7 (Conv2D) (None, 224, 224, 16 2320 ['activation_6[0][0]']
)
batch_normalization_7 (BatchNo (None, 224, 224, 16 64 ['conv2d_7[0][0]']
rmalization) )
activation_7 (Activation) (None, 224, 224, 16 0 ['batch_normalization_7[0][0]']
)
conv2d_8 (Conv2D) (None, 224, 224, 1) 17 ['activation_7[0][0]']
activation_8 (Activation) (None, 224, 224, 1) 0 ['conv2d_8[0][0]']
==================================================================================================
Total params: 416,209
Trainable params: 409,025
Non-trainable params: 7,184
__________________________________________________________________________________________________
Designing Dice Coefficient and Loss function
smooth = 1e-15
def dice_coef(y_true, y_pred):
y_true = tf.keras.layers.Flatten()(y_true)
y_pred = tf.keras.layers.Flatten()(y_pred)
intersection = tf.reduce_sum(y_true * y_pred)
return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)
def dice_loss(y_true, y_pred):
return 1.0 - dice_coef(y_true, y_pred)
Compliling the Model
opt = tf.keras.optimizers.Nadam(LR)
metrics = [dice_coef, Recall(), Precision()]
model.compile(loss=dice_loss, optimizer=opt, metrics=metrics)
callbacks = [
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False)
]
Training the Model
train_steps = len(X_train)//BATCH
valid_steps = len(X_val)//BATCH
if len(X_train) % BATCH != 0:
train_steps += 1
if len(X_val) % BATCH != 0:
valid_steps += 1
model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=EPOCHS,
steps_per_epoch=train_steps,
validation_steps=valid_steps,
callbacks=callbacks
)
Epoch 1/30 41/41 [==============================] - 166s 4s/step - loss: 0.7108 - dice_coef: 0.2897 - recall: 0.7593 - precision: 0.2299 - val_loss: 0.6771 - val_dice_coef: 0.2981 - val_recall: 0.9554 - val_precision: 0.2574 - lr: 1.0000e-04 Epoch 2/30 41/41 [==============================] - 142s 3s/step - loss: 0.5818 - dice_coef: 0.4182 - recall: 0.8950 - precision: 0.3984 - val_loss: 0.6208 - val_dice_coef: 0.3503 - val_recall: 0.9498 - val_precision: 0.3373 - lr: 1.0000e-04 Epoch 3/30 41/41 [==============================] - 142s 3s/step - loss: 0.5352 - dice_coef: 0.4644 - recall: 0.8970 - precision: 0.4673 - val_loss: 0.6174 - val_dice_coef: 0.3528 - val_recall: 0.9583 - val_precision: 0.3459 - lr: 1.0000e-04 Epoch 4/30 41/41 [==============================] - 142s 3s/step - loss: 0.5088 - dice_coef: 0.4913 - recall: 0.8958 - precision: 0.5041 - val_loss: 0.6100 - val_dice_coef: 0.3604 - val_recall: 0.9574 - val_precision: 0.3499 - lr: 1.0000e-04 Epoch 5/30 41/41 [==============================] - 142s 3s/step - loss: 0.4905 - dice_coef: 0.5099 - recall: 0.8988 - precision: 0.5338 - val_loss: 0.5874 - val_dice_coef: 0.3817 - val_recall: 0.9487 - val_precision: 0.3716 - lr: 1.0000e-04 Epoch 6/30 41/41 [==============================] - 142s 3s/step - loss: 0.4781 - dice_coef: 0.5224 - recall: 0.9011 - precision: 0.5476 - val_loss: 0.5505 - val_dice_coef: 0.4162 - val_recall: 0.9213 - val_precision: 0.4402 - lr: 1.0000e-04 Epoch 7/30 41/41 [==============================] - 143s 3s/step - loss: 0.4589 - dice_coef: 0.5412 - recall: 0.9033 - precision: 0.5702 - val_loss: 0.5898 - val_dice_coef: 0.3790 - val_recall: 0.9579 - val_precision: 0.3424 - lr: 1.0000e-04 Epoch 8/30 41/41 [==============================] - 143s 3s/step - loss: 0.4456 - dice_coef: 0.5545 - recall: 0.9020 - precision: 0.5912 - val_loss: 0.5390 - val_dice_coef: 0.4277 - val_recall: 0.9184 - val_precision: 0.4379 - lr: 1.0000e-04 Epoch 9/30 41/41 [==============================] - 142s 3s/step - loss: 0.4326 - dice_coef: 0.5670 - recall: 0.9028 - precision: 0.6102 - val_loss: 0.5041 - val_dice_coef: 0.4625 - val_recall: 0.8356 - val_precision: 0.5651 - lr: 1.0000e-04 Epoch 10/30 41/41 [==============================] - 142s 3s/step - loss: 0.4137 - dice_coef: 0.5863 - recall: 0.9026 - precision: 0.6390 - val_loss: 0.5012 - val_dice_coef: 0.4642 - val_recall: 0.8436 - val_precision: 0.5483 - lr: 1.0000e-04 Epoch 11/30 41/41 [==============================] - 142s 3s/step - loss: 0.4155 - dice_coef: 0.5848 - recall: 0.8826 - precision: 0.6444 - val_loss: 0.4930 - val_dice_coef: 0.4734 - val_recall: 0.8382 - val_precision: 0.5486 - lr: 1.0000e-04 Epoch 12/30 41/41 [==============================] - 142s 3s/step - loss: 0.3944 - dice_coef: 0.6053 - recall: 0.9109 - precision: 0.6566 - val_loss: 0.4875 - val_dice_coef: 0.4808 - val_recall: 0.7130 - val_precision: 0.6604 - lr: 1.0000e-04 Epoch 13/30 41/41 [==============================] - 142s 3s/step - loss: 0.3829 - dice_coef: 0.6169 - recall: 0.8945 - precision: 0.6899 - val_loss: 0.4785 - val_dice_coef: 0.4898 - val_recall: 0.6875 - val_precision: 0.6722 - lr: 1.0000e-04 Epoch 14/30 41/41 [==============================] - 142s 3s/step - loss: 0.3795 - dice_coef: 0.6198 - recall: 0.9005 - precision: 0.6822 - val_loss: 0.4722 - val_dice_coef: 0.4970 - val_recall: 0.6673 - val_precision: 0.6919 - lr: 1.0000e-04 Epoch 15/30 41/41 [==============================] - 142s 3s/step - loss: 0.3680 - dice_coef: 0.6317 - recall: 0.9005 - precision: 0.6989 - val_loss: 0.4971 - val_dice_coef: 0.4795 - val_recall: 0.5607 - val_precision: 0.7660 - lr: 1.0000e-04 Epoch 16/30 41/41 [==============================] - 142s 3s/step - loss: 0.3468 - dice_coef: 0.6529 - recall: 0.9143 - precision: 0.7192 - val_loss: 0.5236 - val_dice_coef: 0.4587 - val_recall: 0.4910 - val_precision: 0.8029 - lr: 1.0000e-04 Epoch 17/30 41/41 [==============================] - 143s 3s/step - loss: 0.3482 - dice_coef: 0.6515 - recall: 0.8958 - precision: 0.7224 - val_loss: 0.4599 - val_dice_coef: 0.5122 - val_recall: 0.6349 - val_precision: 0.7107 - lr: 1.0000e-04 Epoch 18/30 41/41 [==============================] - 143s 3s/step - loss: 0.3291 - dice_coef: 0.6709 - recall: 0.9148 - precision: 0.7414 - val_loss: 0.4641 - val_dice_coef: 0.5074 - val_recall: 0.6051 - val_precision: 0.7259 - lr: 1.0000e-04 Epoch 19/30 41/41 [==============================] - 143s 3s/step - loss: 0.3241 - dice_coef: 0.6757 - recall: 0.9085 - precision: 0.7489 - val_loss: 0.4582 - val_dice_coef: 0.5148 - val_recall: 0.6228 - val_precision: 0.7109 - lr: 1.0000e-04 Epoch 20/30 41/41 [==============================] - 142s 3s/step - loss: 0.3130 - dice_coef: 0.6871 - recall: 0.9116 - precision: 0.7588 - val_loss: 0.4901 - val_dice_coef: 0.4874 - val_recall: 0.5223 - val_precision: 0.7746 - lr: 1.0000e-04 Epoch 21/30 41/41 [==============================] - 143s 3s/step - loss: 0.2968 - dice_coef: 0.7030 - recall: 0.9210 - precision: 0.7769 - val_loss: 0.4519 - val_dice_coef: 0.5190 - val_recall: 0.6088 - val_precision: 0.7246 - lr: 1.0000e-04 Epoch 22/30 41/41 [==============================] - 142s 3s/step - loss: 0.3016 - dice_coef: 0.6980 - recall: 0.9150 - precision: 0.7740 - val_loss: 0.4641 - val_dice_coef: 0.5098 - val_recall: 0.5559 - val_precision: 0.7739 - lr: 1.0000e-04 Epoch 23/30 41/41 [==============================] - 143s 3s/step - loss: 0.2860 - dice_coef: 0.7141 - recall: 0.9132 - precision: 0.7925 - val_loss: 0.4473 - val_dice_coef: 0.5248 - val_recall: 0.6022 - val_precision: 0.7224 - lr: 1.0000e-04 Epoch 24/30 41/41 [==============================] - 142s 3s/step - loss: 0.2853 - dice_coef: 0.7145 - recall: 0.9111 - precision: 0.7917 - val_loss: 0.4241 - val_dice_coef: 0.5437 - val_recall: 0.6745 - val_precision: 0.6724 - lr: 1.0000e-04 Epoch 25/30 41/41 [==============================] - 142s 3s/step - loss: 0.2658 - dice_coef: 0.7343 - recall: 0.9207 - precision: 0.8085 - val_loss: 0.4604 - val_dice_coef: 0.5161 - val_recall: 0.5385 - val_precision: 0.7669 - lr: 1.0000e-04 Epoch 26/30 41/41 [==============================] - 141s 3s/step - loss: 0.2607 - dice_coef: 0.7394 - recall: 0.9179 - precision: 0.8136 - val_loss: 0.4465 - val_dice_coef: 0.5298 - val_recall: 0.5619 - val_precision: 0.7524 - lr: 1.0000e-04 Epoch 27/30 41/41 [==============================] - 143s 3s/step - loss: 0.2645 - dice_coef: 0.7357 - recall: 0.9143 - precision: 0.8126 - val_loss: 0.4202 - val_dice_coef: 0.5443 - val_recall: 0.6592 - val_precision: 0.6746 - lr: 1.0000e-04 Epoch 28/30 41/41 [==============================] - 142s 3s/step - loss: 0.2561 - dice_coef: 0.7434 - recall: 0.9122 - precision: 0.8183 - val_loss: 0.5472 - val_dice_coef: 0.4481 - val_recall: 0.3727 - val_precision: 0.8687 - lr: 1.0000e-04 Epoch 29/30 41/41 [==============================] - 141s 3s/step - loss: 0.2415 - dice_coef: 0.7587 - recall: 0.9147 - precision: 0.8347 - val_loss: 0.4350 - val_dice_coef: 0.5347 - val_recall: 0.5799 - val_precision: 0.7293 - lr: 1.0000e-04 Epoch 30/30 41/41 [==============================] - 142s 3s/step - loss: 0.2320 - dice_coef: 0.7681 - recall: 0.9224 - precision: 0.8440 - val_loss: 0.4450 - val_dice_coef: 0.5333 - val_recall: 0.5345 - val_precision: 0.7683 - lr: 1.0000e-04
<keras.callbacks.History at 0x7ff7687eded0>
Evaluating the Model
test_steps = (len(X_test)//BATCH)
if len(X_test) % BATCH != 0:
test_steps += 1
model.evaluate(X_test, y_test, steps=test_steps)
3/3 [==============================] - 2s 601ms/step - loss: 0.4382 - dice_coef: 0.5663 - recall: 0.5353 - precision: 0.8067
[0.4382132887840271, 0.5662946701049805, 0.5352588295936584, 0.8066996932029724]
he model has precision and recall of 80.6% and 53.53% respectively. The loss is 43% and dice coefficient is 0.5663.
Predicting an image that was not used for training and testing the model
filename = '/content/drive/MyDrive/CV2 Project/Part 1Test Data - Prediction Image.jpeg'
unscaled = cv2.imread(filename)
image = cv2.resize(unscaled, (IMAGE_WIDTH, IMAGE_HEIGHT))
feat_scaled = preprocess_input(np.array(image, dtype=np.float32))
feat_scaled
array([[[ 0.5686275 , 0.4666667 , 0.45098042],
[ 0.56078434, 0.45882356, 0.4431373 ],
[ 0.5764706 , 0.47450984, 0.45882356],
...,
[-0.17647058, -0.5058824 , -0.73333335],
[-0.27058822, -0.49019605, -0.75686276],
[-0.27843136, -0.47450978, -0.7411765 ]],
[[ 0.6 , 0.49803925, 0.48235297],
[ 0.6 , 0.4901961 , 0.48235297],
[ 0.6313726 , 0.5294118 , 0.5137255 ],
...,
[-0.20784312, -0.52156866, -0.7490196 ],
[-0.16862744, -0.40392154, -0.6627451 ],
[-0.19215685, -0.41176468, -0.6784314 ]],
[[ 0.54509807, 0.45098042, 0.41960788],
[ 0.47450984, 0.3803922 , 0.34901965],
[ 0.5372549 , 0.4431373 , 0.4039216 ],
...,
[-0.14509803, -0.42745095, -0.6627451 ],
[-0.3098039 , -0.5686275 , -0.827451 ],
[-0.34117645, -0.5921569 , -0.8509804 ]],
...,
[[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
...,
[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
[-1. , -1. , -1. ]],
[[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
...,
[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
[-1. , -1. , -1. ]],
[[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
...,
[-1. , -1. , -1. ],
[-1. , -1. , -1. ],
[-1. , -1. , -1. ]]], dtype=float32)
y_pred = model.predict(np.array([feat_scaled]))
y_pred
array([[[[0.06614217],
[0.03107181],
[0.02338028],
...,
[0.0247125 ],
[0.03345716],
[0.05858895]],
[[0.09642705],
[0.0230692 ],
[0.02448079],
...,
[0.02521592],
[0.03388017],
[0.04617095]],
[[0.05466384],
[0.02802664],
[0.03126341],
...,
[0.03188828],
[0.03482568],
[0.04438928]],
...,
[[0.03160331],
[0.03991133],
[0.03206226],
...,
[0.05201855],
[0.03160545],
[0.03561294]],
[[0.04729846],
[0.02955183],
[0.02998626],
...,
[0.03511113],
[0.03256804],
[0.03893489]],
[[0.10276005],
[0.04556322],
[0.05068821],
...,
[0.05542785],
[0.05803588],
[0.09601912]]]], dtype=float32)
pred_mask = cv2.resize((1.0*(y_pred[0] > 0.5)), (IMAGE_WIDTH,IMAGE_HEIGHT))
Viewing the predicted image and its face-detected output
plt.imshow(feat_scaled)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
<matplotlib.image.AxesImage at 0x7ff763dc2810>
plt.imshow(pred_mask)
<matplotlib.image.AxesImage at 0x7ff7681b64d0>
• DOMAIN: Face recognition
• CONTEXT: Company X intends to build a face identification model to recognise human faces.
• DATA DESCRIPTION: The dataset comprises of images and its mask where there is a human face.
• PROJECT OBJECTIVE: Face Aligned Face Dataset from Pinterest. This dataset contains 10,770 images for 100 people. All images are taken
from 'Pinterest' and aligned using dlib library. Some data samples:
• TASK: In this problem, we use a pre-trained model trained on Face recognition to recognise similar faces. Here, we are particularly
interested in recognising whether two given faces are of the same person or not. Below are the steps involved in the project.
# Imports
import pandas as pd, numpy as np, matplotlib.pyplot as plt, sklearn, re, random
import matplotlib.gridspec as gridspec
from tqdm.notebook import tqdm
import tensorflow, cv2
%matplotlib inline
# Extract content from zipfile
from zipfile import ZipFile
# Model
from tensorflow.keras.layers import ZeroPadding2D, Convolution2D, MaxPooling2D, Dropout, Flatten, Activation
from tensorflow.keras.models import Sequential, Model
from sklearn.svm import SVC
# Encode, standardize and PCA
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
# Set random_state
random_state = 2020
# Suppress warnings, if any
import warnings; warnings.filterwarnings('ignore')
# Print versions
print(f'Pandas version: {pd.__version__}')
print(f'Numpy version: {np.__version__}')
print(f'Scikit-learn version: {sklearn.__version__}')
print(f'Tensorflow version: {tensorflow.__version__}')
print(f'CV version: {cv2.__version__}')
Pandas version: 1.3.5 Numpy version: 1.21.5 Scikit-learn version: 1.0.2 Tensorflow version: 2.8.0 CV version: 4.1.2
!ls
Aligned+Face+Dataset+from+Pinterest+-+CV+project+1.zip images.npy 'Part 1Test Data - Prediction Image.jpeg'
with ZipFile('Aligned+Face+Dataset+from+Pinterest+-+CV+project+1.zip', 'r') as zip:
zip.extractall()
Function to load images
class IdentityMetadata():
def __init__(self, base, name, file):
# print(base, name, file)
# dataset base directory
self.base = base
# identity name
self.name = name
# image file name
self.file = file
def __repr__(self):
return self.image_path()
def image_path(self):
return os.path.join(self.base, self.name, self.file)
def load_metadata(path):
metadata = []
exts = []
for i in os.listdir(path):
for f in os.listdir(os.path.join(path, i)):
# Check file extension. Allow only jpg/jpeg' files.
ext = os.path.splitext(f)[1]
if ext == '.jpg' or ext == '.jpeg':
metadata.append(IdentityMetadata(path, i, f))
exts.append(ext)
return np.array(metadata), exts
metadata, exts = load_metadata('PINS')
labels = np.array([meta.name for meta in metadata])
Define function to load image
def load_image(path):
img = cv2.imread(path, 1)
# OpenCV loads images with color channels
# in BGR order. So we need to reverse them
return img[...,::-1]
Load a sample image
n = np.random.randint(1, len(metadata))
img_path = metadata[n].image_path()
img = load_image(img_path)
fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
title = labels[n].split('_')[1]
ax.set_title(title, fontsize = 20)
_ = plt.imshow(img)
VGG Face model
def vgg_face():
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape = (224, 224, 3)))
model.add(Convolution2D(64, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, (3, 3), activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, (3, 3), activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, (3, 3), activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, (3, 3), activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides =(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, (3, 3), activation = 'relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, (3, 3), activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Convolution2D(4096, (7, 7), activation = 'relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation = 'relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))
return model
Load the model
model = vgg_face()
model.load_weights('/content/drive/MyDrive/CV2 Project/vgg_face_weights.h5')
print(model.summary())
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
zero_padding2d_26 (ZeroPadd (None, 226, 226, 3) 0
ing2D)
conv2d_41 (Conv2D) (None, 224, 224, 64) 1792
zero_padding2d_27 (ZeroPadd (None, 226, 226, 64) 0
ing2D)
conv2d_42 (Conv2D) (None, 224, 224, 64) 36928
max_pooling2d_10 (MaxPoolin (None, 112, 112, 64) 0
g2D)
zero_padding2d_28 (ZeroPadd (None, 114, 114, 64) 0
ing2D)
conv2d_43 (Conv2D) (None, 112, 112, 128) 73856
zero_padding2d_29 (ZeroPadd (None, 114, 114, 128) 0
ing2D)
conv2d_44 (Conv2D) (None, 112, 112, 128) 147584
max_pooling2d_11 (MaxPoolin (None, 56, 56, 128) 0
g2D)
zero_padding2d_30 (ZeroPadd (None, 58, 58, 128) 0
ing2D)
conv2d_45 (Conv2D) (None, 56, 56, 256) 295168
zero_padding2d_31 (ZeroPadd (None, 58, 58, 256) 0
ing2D)
conv2d_46 (Conv2D) (None, 56, 56, 256) 590080
zero_padding2d_32 (ZeroPadd (None, 58, 58, 256) 0
ing2D)
conv2d_47 (Conv2D) (None, 56, 56, 256) 590080
max_pooling2d_12 (MaxPoolin (None, 28, 28, 256) 0
g2D)
zero_padding2d_33 (ZeroPadd (None, 30, 30, 256) 0
ing2D)
conv2d_48 (Conv2D) (None, 28, 28, 512) 1180160
zero_padding2d_34 (ZeroPadd (None, 30, 30, 512) 0
ing2D)
conv2d_49 (Conv2D) (None, 28, 28, 512) 2359808
zero_padding2d_35 (ZeroPadd (None, 30, 30, 512) 0
ing2D)
conv2d_50 (Conv2D) (None, 28, 28, 512) 2359808
max_pooling2d_13 (MaxPoolin (None, 14, 14, 512) 0
g2D)
zero_padding2d_36 (ZeroPadd (None, 16, 16, 512) 0
ing2D)
conv2d_51 (Conv2D) (None, 14, 14, 512) 2359808
zero_padding2d_37 (ZeroPadd (None, 16, 16, 512) 0
ing2D)
conv2d_52 (Conv2D) (None, 14, 14, 512) 2359808
zero_padding2d_38 (ZeroPadd (None, 16, 16, 512) 0
ing2D)
conv2d_53 (Conv2D) (None, 14, 14, 512) 2359808
max_pooling2d_14 (MaxPoolin (None, 7, 7, 512) 0
g2D)
conv2d_54 (Conv2D) (None, 1, 1, 4096) 102764544
dropout_4 (Dropout) (None, 1, 1, 4096) 0
conv2d_55 (Conv2D) (None, 1, 1, 4096) 16781312
dropout_5 (Dropout) (None, 1, 1, 4096) 0
conv2d_56 (Conv2D) (None, 1, 1, 2622) 10742334
flatten_2 (Flatten) (None, 2622) 0
activation_11 (Activation) (None, 2622) 0
=================================================================
Total params: 145,002,878
Trainable params: 145,002,878
Non-trainable params: 0
_________________________________________________________________
None
Get vgg_face_descriptor
vgg_face_descriptor = Model(inputs = model.layers[0].input, outputs = model.layers[-2].output)
Generate embeddings for each image in the dataset
# Get embedding vector for first image in the metadata using the pre-trained model
img_path = metadata[0].image_path()
img = load_image(img_path)
# Normalising pixel values from [0-255] to [0-1]: scale RGB values to interval [0, 1]
img = (img / 255.).astype(np.float32)
img = cv2.resize(img, dsize = (224, 224))
print(img.shape)
# Obtain embedding vector for an image
# Get the embedding vector for the above image using vgg_face_descriptor model and print the shape
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img, axis = 0))[0]
print(embedding_vector.shape)
(224, 224, 3) (2622,)
Generate embeddings for all images
embeddings = []
embeddings = np.zeros((metadata.shape[0], 2622))
for i, meta in tqdm(enumerate(metadata)):
try:
image = load_image(str(meta))
image = (image/255.).astype(np.float32)
image = cv2.resize(image, (224, 224))
embeddings[i] = vgg_face_descriptor.predict(np.expand_dims(image, axis = 0))[0]
except:
embeddings[i] = np.zeros(2622)
Function to calculate distance between given 2 pairs of images.
def distance(emb1, emb2):
return np.sum(np.square(emb1 - emb2))
Plot images and get distance between the pairs given below
2, 3 and 2, 180
30, 31 and 30, 100
70, 72 and 70, 115
def show_pair(idx1, idx2):
plt.figure(figsize = (8, 3))
plt.suptitle(f'Distance = {distance(embeddings[idx1], embeddings[idx2]):.2f}')
plt.subplot(121)
plt.imshow(load_image(metadata[idx1].image_path()))
plt.subplot(122)
plt.imshow(load_image(metadata[idx2].image_path()))
show_pair(2, 3)
show_pair(2, 180)
show_pair(2, 3)
show_pair(2, 180)
show_pair(70, 72)
show_pair(70, 115)
Create train and test sets
train_idx = np.arange(metadata.shape[0]) % 9 != 0
test_idx = np.arange(metadata.shape[0]) % 9 == 0
# Features
X_train = np.array(embeddings)[train_idx]
X_test = np.array(embeddings)[test_idx]
# Labels
y_train = np.array([meta.name for meta in metadata[train_idx]])
y_test = np.array([meta.name for meta in metadata[test_idx]])
display(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
(9573, 2622)
(1197, 2622)
(9573,)
(1197,)
Encode the Labels
# Encode labels
en = LabelEncoder()
y_train = en.fit_transform(y_train)
y_test = en.transform(y_test)
Standardize the feature values
# Standarize features
sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)
Reduce dimensions using PCA
# Covariance matrix
cov_matrix = np.cov(X_train_sc.T)
# Eigen values and vector
eig_vals, eig_vecs = np.linalg.eig(cov_matrix)
# Cumulative variance explained
tot = sum(eig_vals)
var_exp = [(i /tot) * 100 for i in sorted(eig_vals, reverse = True)]
cum_var_exp = np.cumsum(var_exp)
print('Cumulative Variance Explained', cum_var_exp)
Cumulative Variance Explained [ 13.58890702 18.98690257 22.97728226 ... 99.99999983 99.99999999 100. ]
# Get index where cumulative variance explained is > threshold
thres = 95
res = list(filter(lambda i: i > thres, cum_var_exp))[0]
index = (cum_var_exp.tolist().index(res))
print(f'Index of element just greater than {thres}: {str(index)}')
Index of element just greater than 95: 347
# Ploting
plt.figure(figsize = (15 , 7.2))
plt.bar(range(1, eig_vals.size + 1), var_exp, alpha = 0.5, align = 'center', label = 'Individual explained variance')
plt.step(range(1, eig_vals.size + 1), cum_var_exp, where = 'mid', label = 'Cumulative explained variance')
plt.axhline(y = thres, color = 'r', linestyle = '--')
plt.axvline(x = index, color = 'r', linestyle = '--')
plt.ylabel('Explained Variance Ratio')
plt.xlabel('Principal Components')
plt.legend(loc = 'best')
plt.tight_layout()
plt.show()
# Reducing the dimensions
pca = PCA(n_components = index, random_state = random_state, svd_solver = 'full', whiten = True)
pca.fit(X_train_sc)
X_train_pca = pca.transform(X_train_sc)
X_test_pca = pca.transform(X_test_sc)
display(X_train_pca.shape, X_test_pca.shape)
(9573, 347)
(1197, 347)
Build a Classifier
svc_pca = SVC(C = 1, gamma = 0.001, kernel = 'rbf', class_weight = 'balanced', random_state = random_state)
svc_pca.fit(X_train_pca, y_train)
print('SVC accuracy for train set: {0:.3f}'.format(svc_pca.score(X_train_pca, y_train)))
SVC accuracy for train set: 0.995
Test results
def sample_img_plot(sample_idx):
# Load image for sample_idx from test data
sample_img = load_image(metadata[test_idx][sample_idx].image_path())
# Get actual name
actual_name = metadata[test_idx][sample_idx].name.split('_')[-1].title().strip()
# Normalizing pixel values
sample_img = (sample_img/255.).astype(np.float32)
# Resize
sample_img = cv2.resize(sample_img, (224, 224))
# Obtain embedding vector for sample image
embedding = vgg_face_descriptor.predict(np.expand_dims(sample_img, axis = 0))[0]
# Scaled the vector and reshape
embedding_scaled = sc.transform(embedding.reshape(1, -1))
# Predict
sample_pred = svc_pca.predict(pca.transform(embedding_scaled))
# Transform back
pred_name = en.inverse_transform(sample_pred)[0].split('_')[-1].title().strip()
return sample_img, actual_name, pred_name
# Plot for 11th image in test data
sample_img, actual_name, pred_name = sample_img_plot(11)
fig = plt.figure(figsize = (15, 7.2))
plt.axis('off')
plt.imshow(sample_img)
plt.title(f"A: {actual_name} \n P: {pred_name}", color = 'green' if actual_name == pred_name else 'red')
plt.show()
# Random 20 sample images from test data
plt.figure(figsize = (15, 15))
gs1 = gridspec.GridSpec(5, 4)
gs1.update(wspace = 0, hspace = 0.3)
for i in range(20):
ax1 = plt.subplot(gs1[i])
plt.axis('on')
ax1.set_xticklabels([])
ax1.set_yticklabels([])
ax1.set_aspect('equal')
sample_img, actual_name, pred_name = sample_img_plot(random.randint(1, 1197))
plt.axis('off')
plt.imshow(sample_img)
plt.title(f"A: {actual_name} \n P: {pred_name}", color = 'green' if actual_name == pred_name else 'red')
plt.show()
from sklearn.decomposition import PCA
pca = PCA(n_components=128, svd_solver='randomized', whiten=True)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
from sklearn.svm import SVC
clf = SVC(kernel='rbf', class_weight=None , C=10000000, gamma='auto')
clf.fit(X_train, y_train)
clf.score(X_test, y_test)
0.9582289055973267
import warnings
# Suppress LabelEncoder warning
warnings.filterwarnings('ignore')
example_idx = 55
example_image = load_image(metadata[test_idx][example_idx].image_path())
example_prediction = clf.predict([X_test[example_idx]])
example_identity = en.inverse_transform(example_prediction)[0]
plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
Import the the test image. Display the image.
import cv2
from google.colab.patches import cv2_imshow
img1 = cv2.imread('/content/drive/MyDrive/CV2 Project/Part 2 - Test Image - Dwayne Johnson4.jpg', cv2.IMREAD_UNCHANGED)
cv2_imshow(img1)
import cv2
from google.colab.patches import cv2_imshow
img2 = cv2.imread('/content/drive/MyDrive/CV2 Project/Part 2- Test Image - Benedict Cumberbatch9.jpg', cv2.IMREAD_UNCHANGED)
cv2_imshow(img2)
img1.shape , img2.shape
((299, 299, 3), (299, 299, 3))
img_pred_resize1 = cv2.resize(img1,(224,224), interpolation = cv2.INTER_CUBIC)
img_pred_resize2 = cv2.resize(img2,(224,224), interpolation = cv2.INTER_CUBIC)
img_pred_resize1.shape, img_pred_resize2.shape
((224, 224, 3), (224, 224, 3))
img_predict1 = np.expand_dims(img_pred_resize1, axis=0)
img_predict2 = np.expand_dims(img_pred_resize2, axis=0)
pred_img1 = np.squeeze(img_predict1, axis=0)
pred_img2 = np.squeeze(img_predict2, axis=0)
actual_name1 = "Dwayne Johnson"
actual_name2 ="Benedict Cumberbatch"
embedding1 = vgg_face_descriptor.predict(np.expand_dims(pred_img1, axis = 0))[0]
embedding2 = vgg_face_descriptor.predict(np.expand_dims(pred_img2, axis = 0))[0]
embedding_scaled1 = sc.transform(embedding1.reshape(1, -1))
embedding_scaled2 = sc.transform(embedding2.reshape(1, -1))
sample_pred = clf.predict(pca.transform(embedding_scaled1))
pred_name = en.inverse_transform(sample_pred)[0].split('_')[-1].title().strip()
pred_name
'Emilia Clarke'